import numpy as np
from collections import Counter


# 建立一个类KNN，用于k-近邻的计算
class KNN:
    # 初始化
    def __init__(self, x_train, y_train, n_neighbors=3, p=2):  # 初始化数据，neighbor表示邻近点，p为欧氏距离
        self.n = n_neighbors
        self.p = p
        self.x_train = x_train
        self.y_train = y_train

    def predict(self, X):
        # X为测试集
        knn_list = []
        # 先遍历指定个邻近点，求范数
        for i in range(self.n):
            # 计算训练集和测试集之间的距离,np.linalg.norm求范数
            dist = np.linalg.norm(X - self.x_train[i], ord=self.p)
            knn_list.append((dist, self.y_train[i]))  # 在列表末尾添加一个元素

        # 对于剩下的数据集，求范数，并替换近邻中最大的点
        for i in range(self.n, len(self.x_train)):  # 3-20
            max_index = knn_list.index(max(knn_list, key=lambda x: x[0]))  # 找出列表中距离最大的点
            dist = np.linalg.norm(X - self.x_train[i], ord=self.p)  # 计算训练集和测试集之间的距离
            if knn_list[max_index][0] > dist:  # 若当前数据的距离大于之前得出的距离，就将数值替换
                knn_list[max_index] = (dist, self.y_train[i])

        # 把近邻点中标签提取出
        knn = [k[-1] for k in knn_list]
        # 统计标签的个数,Counter计算数组中每个元素出现的次数
        count_pairs = Counter(knn)
        max_count = sorted(count_pairs, key=lambda x: x)[-1]  # 将标签升序排列
        return max_count

    # 计算测试算法的正确率
    def score(self, x_test, y_test):
        right_count = 0
        n = 10
        for X, y in zip(x_test, y_test):
            label = self.predict(X)
            if label == y:
                right_count += 1
        return right_count / len(x_test)
